home *** CD-ROM | disk | FTP | other *** search
- #!/bin/python
-
- """
- ToolGoogleAlerts.py
-
- David Janes
- BlogMatrix
- 2004.03.30
- """
-
- import sys
- import urllib
- import time
- import pprint
- import poplib
- import re
- import os
- import xml.sax.saxutils
-
- import Tool
- import cStringIO
-
- block_re = """([^\s][^\n]*)$(.*?)^<([^\n]*)>$"""
- block_rex = re.compile(block_re, re.I|re.MULTILINE|re.DOTALL)
-
- remove_re = """^Remove this News Alert:(.*?)(http://www.google.com/newsalerts[^\s]*?)$"""
- remove_rex = re.compile(remove_re, re.I|re.MULTILINE|re.DOTALL)
-
- confirm_re = """News Alerts.*Verification Email"""
- confirm_rex = re.compile(confirm_re, re.I|re.MULTILINE|re.DOTALL)
-
- verify_re = """^Verify this News Alert request:(.*?)(http://www.google.com/newsalerts[^\s]*?)$"""
- verify_rex = re.compile(verify_re, re.I|re.MULTILINE|re.DOTALL)
-
- class ToolGoogleAlerts(Tool.ToolInterface):
- """
- The interface for your tool. Simply create an instance of this object and Jaeger
- will figure it out.
- """
- def __init__(self):
- Tool.ToolInterface.__init__(self, self.WEBSERVER)
-
- def get_label(self, selected):
- return "Google News Alerts"
-
- def get_server(self, path):
- """
- """
- return {
- "/": ( self.serve_root, "" ),
- "/setup": ( self.serve_setup, "Setup Google News Alerts" ),
- "/alerts/feed": ( self.serve_feed, "Feed" ),
- }.get(path)
-
- def serve_root(self, operations, path, valuemap):
- result = [
- self.text_standard_header(path),
- """
- <h2>Google News Alerts</h2>
- <p class="first">
- This extension lets you convert Google News Alerts arriving by e-mail
- into syndication feeds for Jäger.
-
- <ul>
- <li>
- <a href="setup">Setup Google News Alerts</a> (do this once)
-
- <li>
- Create new alerts <b><a target=_blank href="http://www.google.com/newsalerts?hl=en">using Google</a></b>.
-
- </ul>
- """,
- """<h3>Your Alerts</h3>
- <p class="first">
- Subscribe to Alerts by dragging the <img align=bottom src="/images/xml.gif"> button to Jäger's window.
- """,
- ]
-
- #
- # subscriptions
- #
- all_alerts = operations.get_userdata('alerts', None)
- if not all_alerts:
- all_alerts = {}
- operations.set_userdata('alerts', all_alerts)
-
- blogs = operations.get_weblogs()
- blog_urls = map(lambda b : b.get("url", ""), blogs)
-
- subscriptions = []
- for subject, ( alert_list, alert_map ) in all_alerts.iteritems():
- google_url = "http://news.google.com/news?hl=en&q=%s" % urllib.quote(subject)
- feed_url = "alerts/feed?subject=%s" % urllib.quote(subject)
- subscriptions.append(( subject, google_url, feed_url, alert_map.get("remove"), google_url in blog_urls ))
-
- subscriptions.sort()
- for is_subscribed in [ True, False ]:
- if is_subscribed: result.append("<h4>Alerts that are subscribed to</h4><ul>")
- else: result.append("<h4>Alerts that are NOT subscribed to</h4><ul>")
-
- for subject, google_url, feed_url, cancel_url, subscribed in subscriptions:
- if subscribed != is_subscribed: continue
- result.append("""<li><a href=%s><img align=bottom border=0 src="/images/xml.gif"></a> <a target=_blank href=%s>%s</a> [<a target=_blank href=%s>Cancel</a>]""" % ( \
- self.quote_attribute(feed_url),
- self.quote_attribute(google_url),
- self.escape_html(subject),
- self.quote_attribute(cancel_url),
- ))
- result.append("</ul>")
-
- result.append(self.text_standard_footer())
-
- return 200, "text/html", None, result
-
- def serve_setup(self, operations, path, valuemap):
- is_form = valuemap.get("_form")
-
- hostname = valuemap.get("hostname")
- if hostname == None:
- hostname = operations.get_userdata("hostname", "")
- else:
- operations.set_userdata("hostname", hostname)
-
- account = valuemap.get("account")
- if account == None:
- account = operations.get_userdata("account", "")
- else:
- operations.set_userdata("account", account)
-
- password = valuemap.get("password")
- if password == None:
- password = operations.get_userdata("password", "")
- else:
- operations.set_userdata("password", password)
-
- enabled = valuemap.get("enabled", "")
- if not is_form:
- enabled = operations.get_userdata("enabled", "")
- else:
- operations.set_userdata("enabled", enabled)
-
- dodelete = valuemap.get("delete", "")
- if not is_form:
- dodelete = operations.get_userdata("delete", "")
- else:
- operations.set_userdata("delete", dodelete)
-
- autosubscribe = valuemap.get("autosubscribe", "")
- if not is_form:
- autosubscribe = operations.get_userdata("autosubscribe", "on")
- else:
- operations.set_userdata("autosubscribe", autosubscribe)
-
- result = [
- self.text_standard_header(path),
- """
- <h2>Setup Google News Alerts</h2>
- <p class="first">
- """,
- """\
- <form method="POST">
- <h3>Your E-Mail Address</h3>
- <p class="first">
- This is the e-mail address that you are telling Google News to send alerts to.
-
- <p>
- <table>
- <tr>
- <td align="left" width=120>E-Mail Address:</td>
- <td></td>
- <td><input type="text" name="account" value=\"""" + self.escape_html(account) + """"></td>
- </tr>
-
- </table>
-
- <h3>E-Mail Account Information</h3>
- <h4>POP3 Information</h4>
- <p class="first">
- If you receive your e-mail using a POP3 mail account,
- fill in the information here.
- We recommend that you create a seperate POP3 account for Google
- News Alerts, but if you can't (or don't know how to) do this, it's OK.
-
- <p>
- <table>
- <tr>
- <td align="left" width=120>Hostname:</td>
- <td> </td>
- <td><input type="text" name="hostname" value=\"""" + self.escape_html(hostname) + """"></td>
- </tr>
-
- <tr>
- <td align="left">Password:</td>
- <td></td>
- <td><input type="password" name="password" value=\"""" + self.escape_html(password) + """"></td>
- </tr>
-
- </table>
-
- <h4>Outlook Information</h4>
- <p class="first">
- If you receive your e-mail using Outlook (<b>not</b> Outlook Express),
- fill in the information here. <i>Coming soon</i>.
-
- <h3>Options</h3>
- <p class="first">
- This extension will not run until you click the "Enable" button.
- If you select "Auto-subscribe",
- you will be prompted as soon as any new Google News Alert e-mails are discovered
- (i.e. you may be doing something else entirely!)
-
- <p>
- <table>
- <tr>
- <td align="left" width=120>Enable:</td>
- <td></td>
- <td><input type="checkbox" name="enabled" """ + ( enabled and "checked" or "" ) + """></td>
- </tr>
-
- <tr>
- <td align="left">Auto-subscribe:</td>
- <td></td>
- <td><input type="checkbox" name="autosubscribe" """ + ( autosubscribe and "checked" or "" ) + """></td>
- </tr>
-
- <tr>
- <td></td>
- <td></td>
- <td><input type="Submit" name="submit" value="Save"></td>
- </tr>
- </table>
- """ ]
-
- result.append(self.text_standard_footer())
- return 200, "text/html", None, result
-
- def serve_feed(self, operations, path, valuemap):
- subject_alert = valuemap.get("subject", "")
- if not subject_alert:
- return 404, "text/html", self.text_filenotfound()
-
- print >> sys.stderr, "serve_feed: subject='%s'" % subject_alert
-
- all_alerts = operations.get_userdata('alerts', None)
- if not all_alerts:
- all_alerts = {}
- operations.set_userdata('alerts', all_alerts)
-
- # pprint.pprint(all_alerts.keys())
-
- alert = all_alerts.get(subject_alert)
- if not alert:
- return 404, "text/html", self.text_filenotfound()
-
- alert_list, alert_map = alert
-
- #
- # output the RSS
- #
- result = []
-
- result.append('<?xml version="1.0" encoding="iso-8859-1"?>')
- result.append('<rss version="0.92">')
- result.append('<channel>')
-
- result.append("<title>Google News Alerts for %s</title>" % \
- xml.sax.saxutils.escape(subject_alert))
- result.append("<link>http://news.google.com/news?hl=en&q=%s</link>" % \
- urllib.quote(subject_alert))
-
- for x in range(len(alert_list), 0, -1):
- itime, iurl, ititle, imap = alert_list[x - 1]
- ituple = time.localtime(itime)
-
- result.append(' <item>')
- result.append(' <link>%s</link>' % xml.sax.saxutils.escape(iurl))
- result.append(' <title>%s</title>' % xml.sax.saxutils.escape(ititle))
- result.append(' <pubDate>%s</pubDate>' % time.strftime("%a, %d %b %Y %H:%M:%S GMT", ituple))
- result.append(' </item>')
-
- result.append('</channel>')
- result.append('</rss>')
-
- return 200, "application/xml", None, result
-
-
- def pulse(self, operations):
- hostname = operations.get_userdata("hostname", "")
- if not hostname: return
-
- account = operations.get_userdata("account", "")
- if not account: return
-
- password = operations.get_userdata("password", "")
- if not password: return
-
- enabled = operations.get_userdata("enabled", "")
- if not enabled: return
-
- interval_minutes = 10
- try: interval_minutes = int(os.environ.get('JAEGER_ALERTS_INTERVAL', '10'))
- except: pass
-
- last_attempt = operations.get_userdata("last_attempt", 0)
- if last_attempt + interval_minutes * 60 > time.time():
- return
- operations.set_userdata("last_attempt", time.time())
-
- print >> sys.stderr, "ToolGoogleAlerts"
-
- connection = poplib.POP3(hostname)
- connection.user(account)
- connection.pass_(password)
- numMessages = len(connection.list()[1])
- for i in range(numMessages):
- lines = []
- for j in connection.retr(i+1)[1]:
- lines.append(j)
-
- self.process_message(operations, "\n".join(lines))
- # self.process_message(os.linesep.join(lines))
- # pprint.pprint(("ToolGoogleAlerts: *******", i, lines))
-
- def process_message(self, operations, message):
- #
- # get the current alerts
- #
- all_alerts = operations.get_userdata('alerts', None)
- if not all_alerts:
- all_alerts = {}
- operations.set_userdata('alerts', all_alerts)
-
- verified = operations.get_userdata('verified', None)
- if not verified:
- verified = {}
- operations.set_userdata('verified', verified)
-
- try:
- import types
- import email
- import email.Utils
-
- msg = email.message_from_string(message)
-
- frm = msg.get('From')
- if frm != 'newsalerts-noreply@google.com': return
-
- date = msg.get('Date')
- if not date: return
-
- pdate = email.Utils.parsedate(date)
- if not pdate: return
-
- tdate = time.mktime(pdate)
- if (time.time() - tdate) / 3600 > (24 * 3):
- return
-
- msgid = msg.get('Message-ID')
-
- payload = msg.get_payload()
- if not type(payload) in types.StringTypes: return
-
- subject = msg.get('Subject')
- # print >> sys.stderr, "VERIFY: A", subject
- if confirm_rex.match(subject):
- #
- # This code handles verification
- #
- # print >> sys.stderr, "VERIFY: A.1"
- match = verify_rex.search(payload)
- if match:
- # print >> sys.stderr, "VERIFY: B"
- url = match.group(2)
- if not verified.get(url):
- # print >> sys.stderr, "VERIFY: C"
- operations.log("verifying '%s'" % url)
-
- try:
- f = urllib.urlopen(url)
- f.read()
- f.close()
-
- verified[url] = 1
- except:
- print >> sys.stderr, "ToolGoogleAlerts.process_message: caught exception"
- traceback.print_exc(file = sys.stderr)
-
- # print >> sys.stderr, "VERIFY: D"
- elif subject[:20] == 'Google News Alert - ':
- subject_alert = subject[20:]
-
- #
- #
- alert = all_alerts.get(subject_alert, ( [], {} ))
- # pprint.pprint(alert)
- alert_list, alert_map = alert
-
- is_new_subject = not bool(alert_list)
-
- ## make a list of all URLs that we know about
- known_urls = {}
- for item in alert_list:
- known_urls[item[1]] = 1
-
- ## add new stuff
- for match in block_rex.finditer(payload):
- iurl = match.group(3)
- if known_urls.get(iurl):
- continue
-
- ititle = match.group(1)
- icontents = match.group(2).strip()
-
- alert_list.append(( tdate, iurl, ititle, {} ))
- known_urls[iurl] = 1
-
- print "ToolGoogleAlerts:", iurl, ititle
-
- ## discover the 'remove url' for unsubscribing
- match = remove_rex.search(payload)
- if match:
- alert_map["remove"] = match.group(2)
-
- ## save updated values
- all_alerts[subject_alert] = ( alert_list, alert_map )
-
- if is_new_subject and operations.get_userdata("enabled", "autosubscribe"):
- feed_url = "%salerts/feed?subject=%s" % \
- ( self.tool_root(full=True), urllib.quote(subject_alert) )
- operations.subscribe_to(feed_url)
-
- # pprint.pprint(("ToolGoogleAlerts", alert ))
- except:
- print >> sys.stderr, "ToolGoogleAlerts.process_message: caught exception"
- traceback.print_exc(file = sys.stderr)
-
- #
- # creating it will register it
- #
- ToolGoogleAlerts()
-